options(repos = c(CRAN = "https://cran.r-project.org"))
if(!require('pacman')) install.packages('pacman')
## Loading required package: pacman
pacman::p_load(tidyverse, here, janitor, dplyr, knitr, lubridate, reactable)
rm(list = ls())

FIRST STEPS

Dataset manipulation in ‘bl_schisto_long_25052024_BW’.

load("data/bl_schisto_wide_25052024_BW.Rdata")
reactable(bl_schisto_wide_25052024_BW)

Exclusion of HIV-HBV co-infected patients

library(dplyr)
library(haven)
library(labelled)

bl_schisto_wide_25052024_BW$hiv_status <- as.character(as_factor(bl_schisto_wide_25052024_BW$hiv_status))

bl_schisto_wide_25052024_noCoInf <- bl_schisto_wide_25052024_BW %>% 
  filter(hiv_status == "HBV")

rm(list = "bl_schisto_wide_25052024_BW")
reactable(bl_schisto_wide_25052024_noCoInf)
# what values do we find in the 'hiv_status' column ?
unique_values <- unique(bl_schisto_wide_25052024_noCoInf$hiv_status)
print(unique_values)
## [1] "HBV"
# patient count
bl_schisto_wide_25052024_noCoInf %>%
  arrange(pid) %>% 
  distinct(pid) %>%
  nrow()
## [1] 799

Create ‘CCA_Fb’ dataset with HBV-monoinfected patients who have been tested for CCA (n = 721)

CCA_Fb <- bl_schisto_wide_25052024_noCoInf %>%
  select(pid, cca_res, cca_bl_date, fibs_med_res, fibs_iqr_res, cap_med_res, cap_iqr_res) %>% 
  filter(!is.na(cca_res))

reactable(CCA_Fb)
# checking for duplicates
CCA_Fb %>% 
  get_dupes
## No variable names specified - using all columns.
## No duplicate combinations found of: pid, cca_res, cca_bl_date, fibs_med_res, fibs_iqr_res, cap_med_res, cap_iqr_res
## # A tibble: 0 × 8
## # ℹ 8 variables: pid <chr>, cca_res <fct>, cca_bl_date <date>,
## #   fibs_med_res <dbl>, fibs_iqr_res <dbl>, cap_med_res <dbl>,
## #   cap_iqr_res <dbl>, dupe_count <int>

Create ‘SENB_noCCA’ dataset with patients (both mono- & co-infected) who have never been tested for CCA (n = 127)

# load dataset before exclusion of HIV/HBV patients
load("data/bl_schisto_wide_25052024_BW.Rdata")

SENB_noCCA <- bl_schisto_wide_25052024_BW %>%
  select(pid, enrol_d, cca_res, cca_bl_date, fibs_med_res, fibs_iqr_res, cap_med_res, cap_iqr_res) %>%
  rename(fibs_med = fibs_med_res) %>% 
  rename(fibs_iqr = fibs_iqr_res) %>% 
  rename(cap_med = cap_med_res) %>% 
  rename(cap_iqr = cap_iqr_res) %>% 
  filter(is.na(cca_res))

rm(list = "bl_schisto_wide_25052024_BW")

reactable(SENB_noCCA)
# checking for duplicates
SENB_noCCA %>% 
  get_dupes
## No variable names specified - using all columns.
## No duplicate combinations found of: pid, enrol_d, cca_res, cca_bl_date, fibs_med, fibs_iqr, cap_med, cap_iqr
## # A tibble: 0 × 9
## # ℹ 9 variables: pid <chr>, enrol_d <date>, cca_res <fct>, cca_bl_date <date>,
## #   fibs_med <dbl>, fibs_iqr <dbl>, cap_med <dbl>, cap_iqr <dbl>,
## #   dupe_count <int>

List of SENB patients to be tested for CCA

patient_list <- as.list(SENB_noCCA$pid)
cat(paste(patient_list, collapse = ", "))
## SENB1082, SENB1205, SENB1212, SENB1225, SENB1227, SENB1241, SENB1275, SENB1280, SENB1281, SENB1492, SENB1496, SENB1512, SENB1536, SENB1541, SENB1555, SENB1608, SENB1632, SENB1705, SENB2001, SENB2003, SENB2006, SENB2008, SENB2009, SENB2010, SENB2011, SENB2014, SENB2015, SENB2016, SENB2017, SENB2018, SENB2019, SENB2021, SENB2022, SENB2024, SENB2025, SENB2026, SENB2028, SENB2029, SENB2030, SENB2031, SENB2032, SENB2033, SENB2034, SENB2035, SENB2038, SENB2040, SENB2041, SENB2042, SENB2043, SENB2044, SENB2045, SENB2049, SENB2050, SENB2051, SENB2053, SENB2054, SENB2056, SENB2092, SENB2106, SENB2107, SENB2108, SENB2109, SENB2111, SENB2112, SENB2116, SENB2121, SENB2124, SENB2146, SENB2148, SENB2149, SENB2150, SENB2151, SENB2152, SENB2153, SENB2154, SENB2155, SENB2156, SENB2157, SENB2158, SENB2159, SENB2160, SENB2161, SENB2162, SENB2163, SENB2164, SENB2165, SENB2166, SENB2167, SENB2168, SENB2174, SENB2175, SENB2176, SENB2177, SENB2178, SENB2179, SENB2180, SENB2181, SENB2182, SENB2183, SENB2184, SENB2185, SENB2186, SENB2187, SENB2188, SENB2189, SENB2190, SENB2191, SENB2192, SENB2193, SENB2194, SENB2195, SENB2196, SENB2197, SENB2198, SENB2199, SENB2200, SENB2201, SENB2202, SENB2203, SENB2204, SENB2205, SENB2206, SENB2207, SENB2208, SENB2209, SENB2217, SENB2218

We want a table with the pids but also the enrolment date for the FibroScan so we can find the Urine samples easily in the lab

# import long table
load("data/bl_schisto_long_25052024_BW.Rdata")

# create 'CCA_Fb_long' dataset with variables of interest only
CCA_Fb_long <- bl_schisto_long_25052024_BW %>%   
  select(pid, enrol_d, test_dt, cca_res, fibs_med, fibs_iqr, cap_med, cap_iqr, hiv_status)

# delete unnecessary datasets
rm(list = "bl_schisto_long_25052024_BW")

# unlabelling 'cca_res'
CCA_Fb_long$cca_res <- as.character(as_factor(CCA_Fb_long$cca_res))

# unlabelling 'hiv_status'
CCA_Fb_long$hiv_status <- as.character(as_factor(CCA_Fb_long$hiv_status))

# create 'df2' dataset with fibs_date
df2 <- CCA_Fb_long %>%
  group_by(pid) %>%
  mutate(fibs_date = case_when(!is.na(fibs_med) & !is.na(cap_med) ~ test_dt)) %>% # create variable 'fibs_date'
  fill(fibs_date, .direction = "downup") %>%
  ungroup() %>%
  filter(!is.na(fibs_date)) %>% 
  filter(is.na(cca_res)) %>% 
  select(-test_dt)

# join both datasets
patient_table <- inner_join(df2, SENB_noCCA) %>% 
  select(-cca_bl_date) %>%
  select(pid, enrol_d, cca_res, hiv_status)
## Joining with `by = join_by(pid, enrol_d, cca_res, fibs_med, fibs_iqr, cap_med,
## cap_iqr)`
reactable(patient_table)
write.csv(patient_table, file = "SENB_noCCA_n127.csv")

# delete unnecessary datasets
rm(list = "df2", "CCA_Fb_long")

Create ‘SENBmono_noCCA’ dataset with (mono-infected) patients who have never been tested for CCA (n = 78)

SENBmono_noCCA <- bl_schisto_wide_25052024_noCoInf %>%
  select(pid, cca_res, cca_bl_date, fibs_med_res, fibs_iqr_res, cap_med_res, cap_iqr_res) %>%
  rename(fibs_med = fibs_med_res) %>% 
  rename(fibs_iqr = fibs_iqr_res) %>% 
  rename(cap_med = cap_med_res) %>% 
  rename(cap_iqr = cap_iqr_res) %>% 
  filter(is.na(cca_res))

reactable(SENBmono_noCCA)
# checking for duplicates
SENBmono_noCCA %>% 
  get_dupes
## No variable names specified - using all columns.
## No duplicate combinations found of: pid, cca_res, cca_bl_date, fibs_med, fibs_iqr, cap_med, cap_iqr
## # A tibble: 0 × 8
## # ℹ 8 variables: pid <chr>, cca_res <fct>, cca_bl_date <date>, fibs_med <dbl>,
## #   fibs_iqr <dbl>, cap_med <dbl>, cap_iqr <dbl>, dupe_count <int>

SOCIO-DEMOGRAPHICS

Create temporary dataset with all socio-demographic variables

sociodemo1 <- bl_schisto_wide_25052024_noCoInf %>%
  select(1:23) %>%
  mutate(country_orig = case_when(is.na(country_orig) ~ "Senegal",              # rename all 'NA' values in `country_orig` to 'Senegal'
                                  !is.na(country_orig) ~ country_orig)) %>%
  mutate(region_orig = case_when(!is.na(region_orig) ~ region_orig,             # rename all 'NA' values in `region_orig` to their country of origin
                                 is.na(region_orig) ~ country_orig)) %>%
  mutate(dpt_orig = case_when(!is.na(dpt_orig) ~ dpt_orig,                      # rename all 'NA' values in `dpt_orig` to their country of origin
                                 is.na(dpt_orig) ~ country_orig)) %>%
  select(-sen_yn)

reactable(sociodemo1)

Select for variables of interest only

sociodemo2 <- sociodemo1 %>% 
  select(pid, age_en, sex, country_orig, region_orig, reg_act, employ, other_employ) %>% 
  mutate(employ = case_when(employ != "other" ~ employ,
                            employ == "other" ~ str_to_lower(other_employ))) %>% 
  select(-other_employ)

reactable(sociodemo2)
rm(list = "sociodemo1")

Analysis

# age
## define median
median_age <- median(sociodemo2$age_en, na.rm = TRUE)

## define categories
age_cat <- sociodemo2 %>% 
  mutate(age_en = case_when(age_en < 31 ~ "18-31",
                         age_en >= 31 ~ ">31"))
age_cat %>%
  tabyl(age_en) %>%
  adorn_totals("row") %>% 
  adorn_pct_formatting(digits = 2)
##  age_en   n percent
##   18-31 384  48.06%
##     >31 415  51.94%
##   Total 799 100.00%
# sex
sociodemo2 %>% 
  tabyl(sex) %>%
  adorn_totals("row") %>% 
  adorn_pct_formatting(digits = 2)
##     sex   n percent
##    Male 427  53.44%
##  Female 372  46.56%
##   Total 799 100.00%
# origin
## define categories
orig_cat <- sociodemo2 %>% 
  mutate(orig_endemicity = case_when(
    region_orig %in% c("Cameroun", "Benin", "Guinea", "Guinea_Bissau", "Ivory_Coast", "Mali", "Mauritania") ~ "Other Country",
    region_orig %in% c("Kédougou", "Tambacounda", "Matam", "Saint_Louis", "Louga", "Sédhiou", "Kolda") ~ "≥50% prevalence (high)",
    region_orig %in% c("Ziguinchor", "Kaffrine", "Kaolack", "Fatick", "Diurbel", "Thiès") ~ "1-49% prevalence (moderate)",
    region_orig == "Dakar" ~ "<1% prevalence (non-endemic)"
  ))

orig_cat %>%
  tabyl(orig_endemicity) %>%
  adorn_totals("row") %>% 
  adorn_pct_formatting(digits = 2)
##               orig_endemicity   n percent
##   1-49% prevalence (moderate) 419  52.44%
##  <1% prevalence (non-endemic) 271  33.92%
##                 Other Country  17   2.13%
##        ≥50% prevalence (high)  92  11.51%
##                         Total 799 100.00%
# reg_act
reg_act_cat <- sociodemo2 %>% 
  mutate(reg_act = case_when(reg_act == "Dakar" ~ "Dakar",
                                 reg_act != "Dakar" ~ "Outside"))

reg_act_cat %>% 
  tabyl(reg_act) %>%
  adorn_totals("row") %>% 
  adorn_pct_formatting(digits = 2)
##  reg_act   n percent
##    Dakar 642  80.35%
##  Outside 157  19.65%
##    Total 799 100.00%
# profession ?

rm(list = "age_cat", "orig_cat", "reg_act_cat")

Creation of final sociodemographics table

sociodemo_final <- sociodemo2 %>%
  mutate(age_en = case_when(
    age_en < 31 ~ "18-31",
    age_en >= 31 ~ ">31"
  )) %>%
  mutate(orig_endemicity = case_when(
    region_orig %in% c("Cameroun", "Benin", "Guinea", "Guinea_Bissau", "Ivory_Coast", "Mali", "Mauritania") ~ "Other Country",
    region_orig %in% c("Kédougou", "Tambacounda", "Matam", "Saint_Louis", "Louga", "Sédhiou", "Kolda") ~ "≥50% prevalence (high)",
    region_orig %in% c("Ziguinchor", "Kaffrine", "Kaolack", "Fatick", "Diurbel", "Thiès") ~ "1-49% prevalence (moderate)",
    region_orig == "Dakar" ~ "<1% prevalence (non-endemic)"
  )) %>% 
  mutate(reg_act = case_when(reg_act == "Dakar" ~ "Dakar",
                                 reg_act != "Dakar" ~ "Outside")) %>% 
  rename(age_cat = age_en) %>% 
  select(pid, age_cat, sex, country_orig, region_orig, orig_endemicity, reg_act, employ)

reactable(sociodemo_final)
rm(list = "sociodemo2")

COMORBIDITIES

Create temporary dataset with all comorbidities variables

# recreate variable 'HTA' following definition : Confirmed arterial systolic pressure ≥140 mmHg and/or arterial diastolic pressure ≥90 mmHg, or any anti-hypertensive treatment before inclusion in SEN-B (source : WHO, https://www.who.int/news-room/fact-sheets/detail/hypertension, updated 16 March 2023)
bl_schisto_wide_25052024_noCoInf <- bl_schisto_wide_25052024_noCoInf %>% 
  mutate(HTA_diff = aHTA_start - enrol_d) %>%
  mutate(HTA = case_when(!is.na(HTA_diff) ~ "yes",
                         tas >= 140 ~ "yes",
                         tad >= 90 ~ "yes",
                         TRUE ~ "no")) %>% 
  select(-HTA_diff)

# recreate variable 'diabete' ?
bl_schisto_wide_25052024_noCoInf %>% 
  select(diabete, glu_res) %>% 
  filter(diabete == "yes")
## # A tibble: 22 × 2
##    diabete glu_res
##    <fct>     <dbl>
##  1 yes        1.4 
##  2 yes        1.96
##  3 yes        1.69
##  4 yes        0.7 
##  5 yes        0.75
##  6 yes        2.2 
##  7 yes        0.74
##  8 yes        2.43
##  9 yes        1.63
## 10 yes        0.86
## # ℹ 12 more rows
# create variable 'dyslipidemia' : Total cholesterol >2g/l, HDL-c <0.4 g/l in men and <0.5 g/l in women, triglycerides >1.5 g/l, or LDL-cholesterol >1 g/l (source : https://www.ncbi.nlm.nih.gov/books/NBK560891/, updated 4 March 2024)
bl_schisto_wide_25052024_noCoInf <- bl_schisto_wide_25052024_noCoInf %>%
  mutate(dyslipidemia = case_when(colt_res > 2 ~ "yes",
                                  trg_res > 1.5 ~ "yes",
                                  ldl_res > 1 ~ "yes",
                                  sex == "Female" & hdl_res < 0.4 ~ "yes",
                                  sex == "Male" & hdl_res < 0.5 ~ "yes",
                                  TRUE ~ "no"))

# round 'BMI' to only 1 digit
bl_schisto_wide_25052024_noCoInf$BMI <- round(bl_schisto_wide_25052024_noCoInf$BMI, 1)

como1 <- bl_schisto_wide_25052024_noCoInf %>% 
  select(pid, sex, alcohol_cons, audit_tot, drugs, smoke_yn, HTA, diabete, BMI, dyslipidemia, COL, AVC, renal, cancer, liver_dis_FDR, hcc_FDR)

reactable(como1)

Analysis

# alcohol consumption
como1 %>% 
  tabyl(alcohol_cons) %>% 
  adorn_totals("row") %>% 
  adorn_pct_formatting(digits = 2)
##  alcohol_cons   n percent
##            no 744  93.12%
##       yes_now  34   4.26%
##      yes_past  21   2.63%
##         Total 799 100.00%
# audit-c (https://www.ncbi.nlm.nih.gov/pmc/articles/PMC10163557/)
audit_cat <- como1 %>% 
  mutate(audit_cat = case_when(sex == "Female" & audit_tot >= 3 ~ "Misuse",
                               sex == "Male" & audit_tot >= 4 ~ "Misuse",
                               audit_tot >= 10 ~ "Addiction",
                               TRUE ~ "Negative"))

audit_cat %>% 
  tabyl(audit_cat) %>% 
  adorn_totals("row") %>% 
  adorn_pct_formatting(digits = 2)
##  audit_cat   n percent
##     Misuse  15   1.88%
##   Negative 784  98.12%
##      Total 799 100.00%
# drugs
como1 %>% 
  tabyl(drugs) %>% 
  adorn_totals("row") %>% 
  adorn_pct_formatting(digits = 2)
##     drugs   n percent
##        no 786  98.37%
##  yes_now    0   0.00%
##  yes_past  13   1.63%
##     Total 799 100.00%
# smoking
como1 %>% 
  tabyl(smoke_yn) %>% 
  adorn_totals("row") %>% 
  adorn_pct_formatting(digits = 2)
##  smoke_yn   n percent
##        no 710  88.86%
##   yes_now  18   2.25%
##  yes_past  71   8.89%
##     Total 799 100.00%
# HTA
como1 %>% 
  tabyl(HTA) %>% 
  adorn_totals("row") %>% 
  adorn_pct_formatting(digits = 2)
##    HTA   n percent
##     no 607  75.97%
##    yes 192  24.03%
##  Total 799 100.00%
# diabete
como1 %>% 
  tabyl(diabete) %>% 
  adorn_totals("row") %>% 
  adorn_pct_formatting(digits = 2)
##  diabete   n percent
##       no 777  97.25%
##      yes  22   2.75%
##    Total 799 100.00%
# BMI (https://www.ncbi.nlm.nih.gov/books/NBK541070/)
BMI_cat <- como1 %>% 
  mutate(BMI_cat = case_when(BMI < 18.5 ~ "underweight",
                             BMI >= 18.5 & BMI < 25 ~ "normal weight",
                             BMI >= 25 & BMI < 30 ~ "overweight",
                             BMI >= 30 ~ "obesity"))

BMI_cat %>% 
  tabyl(BMI_cat) %>% 
  adorn_totals("row") %>% 
  adorn_pct_formatting(digits = 2)
##        BMI_cat   n percent valid_percent
##  normal weight 459  57.45%        58.03%
##        obesity  59   7.38%         7.46%
##     overweight 156  19.52%        19.72%
##    underweight 117  14.64%        14.79%
##           <NA>   8   1.00%             -
##          Total 799 100.00%       100.00%
# dyslipidemia
como1 %>% 
  tabyl(dyslipidemia) %>% 
  adorn_totals("row") %>% 
  adorn_pct_formatting(digits = 2)
##  dyslipidemia   n percent
##            no 482  60.33%
##           yes 317  39.67%
##         Total 799 100.00%
# variable 'COL' (?)
como1 %>% 
  tabyl(COL) %>% 
  adorn_totals("row") %>% 
  adorn_pct_formatting(digits = 2)
##    COL   n percent
##     no 796  99.62%
##    yes   3   0.38%
##  Total 799 100.00%
# AVC
como1 %>% 
  tabyl(AVC) %>% 
  adorn_totals("row") %>% 
  adorn_pct_formatting(digits = 2)
##    AVC   n percent
##     no 799 100.00%
##    yes   0   0.00%
##  Total 799 100.00%
# variable 'renal' (?)
como1 %>% 
  tabyl(renal) %>% 
  adorn_totals("row") %>% 
  adorn_pct_formatting(digits = 2)
##  renal   n percent
##     no 799 100.00%
##    yes   0   0.00%
##  Total 799 100.00%
# Cancer
como1 %>% 
  tabyl(cancer) %>% 
  adorn_totals("row") %>% 
  adorn_pct_formatting(digits = 2)
##  cancer   n percent
##      no 798  99.87%
##     yes   1   0.13%
##   Total 799 100.00%
# Liver Disease
como1 %>% 
  tabyl(liver_dis_FDR) %>% 
  adorn_totals("row") %>% 
  adorn_pct_formatting(digits = 2)
##  liver_dis_FDR   n percent
##             no 784  98.12%
##            yes  15   1.88%
##          Total 799 100.00%
# CHC Familial
como1 %>% 
  tabyl(hcc_FDR) %>% 
  adorn_totals("row") %>% 
  adorn_pct_formatting(digits = 2)
##  hcc_FDR   n percent
##       no 698  87.36%
##      yes 101  12.64%
##    Total 799 100.00%
# supprimer les objets crées de l'environnement
rm(list = "audit_cat", "BMI_cat")

# supprimer les variables peu intéressantes après l'anaylse descriptive
como2 <- como1 %>%
  select(pid, sex, alcohol_cons, audit_tot, drugs, smoke_yn, HTA, diabete, BMI, dyslipidemia, liver_dis_FDR, hcc_FDR) %>%
  mutate(audit = case_when(sex == "Female" & audit_tot >= 3 ~ "Misuse",
                               sex == "Male" & audit_tot >= 4 ~ "Misuse",
                               audit_tot >= 10 ~ "Addiction",
                               TRUE ~ "Negative")) %>% 
  mutate(BMI = case_when(BMI < 18.5 ~ "underweight",
                             BMI >= 18.5 & BMI < 25 ~ "normal weight",
                             BMI >= 25 & BMI < 30 ~ "overweight",
                             BMI >= 30 ~ "obesity")) %>% 
  select(pid, sex, alcohol_cons, audit, drugs, smoke_yn, HTA, diabete, BMI, dyslipidemia, liver_dis_FDR, hcc_FDR)

reactable(como2)
rm(list = "como1")

Join sociodemographics & comorbidities datasets

sociodemo_como <- left_join(sociodemo_final, como2, by = "pid") %>% 
  rename(sex = sex.x) %>% 
  select(-sex.y)

# delete unnecessary tables
rm(list = "sociodemo_final", "como2")

I forgot to add the “dipstick EtG” variable…

bl_schisto_wide_25052024_noCoInf
## # A tibble: 799 × 224
##    pid      hiv_status enrol_d    center med   sen_yn country_orig region_orig
##    <chr>    <chr>      <date>     <fct>  <chr> <fct>  <fct>        <fct>      
##  1 SENB1002 HBV        2019-09-30 SMIT   1     yes    <NA>         Fatick     
##  2 SENB1003 HBV        2019-10-01 SMIT   1     yes    <NA>         Dakar      
##  3 SENB1004 HBV        2019-10-01 SMIT   1     yes    <NA>         Dakar      
##  4 SENB1005 HBV        2019-10-01 SMIT   1     yes    <NA>         Dakar      
##  5 SENB1006 HBV        2019-10-01 SMIT   1     yes    <NA>         Dakar      
##  6 SENB1007 HBV        2019-10-02 SMIT   1     yes    <NA>         Ziguinchor 
##  7 SENB1008 HBV        2019-10-02 SMIT   1     yes    <NA>         Kaolack    
##  8 SENB1009 HBV        2019-10-02 SMIT   1     yes    <NA>         Louga      
##  9 SENB1010 HBV        2019-10-02 SMIT   1     yes    <NA>         Diurbel    
## 10 SENB1011 HBV        2019-10-02 SMIT   1     no     Mali         <NA>       
## # ℹ 789 more rows
## # ℹ 216 more variables: dpt_orig <fct>, res_time <dbl>, reg_act <fct>,
## #   dpt_act <fct>, birth_d <date>, age_en <dbl>, sex <fct>, rank <dbl>,
## #   marital <fct>, union <fct>, child <dbl>, education <fct>, employ <fct>,
## #   other_employ <chr>, health_ins <fct>, hiv_yn <fct>, type_HIV <chr>,
## #   hiv_test1 <fct>, hiv_test1_date <date>, hiv_test2 <fct>,
## #   hiv_test2_date <date>, CD4_last <dbl>, CD4_last_date <date>, …
# create table with 'pid' and 'etg_res'
dipstick_etg <- bl_schisto_wide_25052024_noCoInf %>% 
  select(pid, etg_res)

# join 'dipstick_etg' with sociodemo_como
sociodemo_como <- left_join(sociodemo_como, dipstick_etg, by = "pid") %>% 
  select(pid, age_cat, sex, country_orig, region_orig, orig_endemicity, reg_act, employ, alcohol_cons, audit, etg_res,
         drugs, smoke_yn, HTA, diabete, BMI, dyslipidemia, liver_dis_FDR, hcc_FDR)

# delete 'dipstick_etg' table
rm(list = "dipstick_etg")

Now we got the full sociodemographics + comorbidities table

reactable(sociodemo_como)

HBV MARKERS

Create temporary dataset with all ‘HBV markers’ variables

hbv_markers <- bl_schisto_wide_25052024_noCoInf %>% 
  select(pid, enrol_d, cvb_res, AgHBs_res, qAgHBs_res, AgHBe_res, AcHBe_res) 

# should I add AcHBs_res too ? lots of missing data (n=132)
bl_schisto_wide_25052024_noCoInf %>% 
  tabyl(AcHBs_res)
##  AcHBs_res   n    percent valid_percent
##   Positive   8 0.01001252      0.011994
##   Negative 659 0.82478098      0.988006
##       <NA> 132 0.16520651            NA
# create variable "screen_d" with just the year
hbv_markers <- hbv_markers %>% 
  mutate(screen_d = year(enrol_d)) %>% 
  select(-enrol_d) %>% 
  select(pid, screen_d, cvb_res, AgHBs_res, qAgHBs_res, AgHBe_res, AcHBe_res)

# categorize variable 'cvb_res' : <20, 20-2000, >2000 IU/mL (source : Hofmann et al., Hepatitis B Virus (HBV) Replication During Tenofovir Therapy Is Frequent in Human Immunodeficiency Virus/HBV Coinfection, Clinical Infectious Diseases, Volume 76, Issue 4, 15 February 2023, Pages 730–733, https://doi.org/10.1093/cid/ciac823)
hbv_markers <- hbv_markers %>% 
  mutate(cvb_detection = case_when(cvb_res < 20 ~ "HBV Suppression",
                             cvb_res >2000 ~ "High-level Viremia",
                             TRUE ~ "Low-level Viremia")) %>% 
  select(pid, screen_d, cvb_res, cvb_detection, AgHBs_res, qAgHBs_res, AgHBe_res, AcHBe_res)

# categorize variable 'qAgHBs_res' : <100, 100-1000, >1000 (source : Ouzan, 2014)
hbv_markers <-  hbv_markers %>% 
  mutate(qAgHBs_cat = case_when(qAgHBs_res < 100 ~ "<100 IU/mL",
                                qAgHBs_res > 1000 ~ ">1000 IU/mL",
                                TRUE ~ "100-1000 IU/mL")) %>% 
  select(pid, screen_d, cvb_res, cvb_detection, AgHBs_res, qAgHBs_res, qAgHBs_cat, AgHBe_res, AcHBe_res)

reactable(hbv_markers)

Analysis

# Screening date
hbv_markers %>% 
  tabyl(screen_d) %>% 
  adorn_totals("row") %>% 
  adorn_pct_formatting(digits = 2)
##  screen_d   n percent
##      2019  82  10.26%
##      2020  59   7.38%
##      2021 409  51.19%
##      2022 243  30.41%
##      2023   6   0.75%
##     Total 799 100.00%
# HBV DNA viral load
hbv_markers %>% 
  tabyl(cvb_detection) %>% 
  adorn_totals("row") %>% 
  adorn_pct_formatting(digits = 2)
##       cvb_detection   n percent
##  High-level Viremia 214  26.78%
##   Low-level Viremia 585  73.22%
##               Total 799 100.00%
# AgHBs
hbv_markers %>% 
  tabyl(AgHBs_res) %>% 
  adorn_totals("row") %>% 
  adorn_pct_formatting(digits = 2)
##  AgHBs_res   n percent
##   Positive 781  97.75%
##   Negative  18   2.25%
##      Total 799 100.00%
# qAgHBs
hbv_markers %>% 
  tabyl(qAgHBs_cat) %>% 
  adorn_totals("row") %>% 
  adorn_pct_formatting(digits = 2)
##      qAgHBs_cat   n percent
##  100-1000 IU/mL 101  12.64%
##      <100 IU/mL  71   8.89%
##     >1000 IU/mL 627  78.47%
##           Total 799 100.00%
# AgHbe
hbv_markers %>% 
  tabyl(AgHBe_res) %>% 
  adorn_totals("row") %>% 
  adorn_pct_formatting(digits = 2)
##  AgHBe_res   n percent
##   Positive  19   2.38%
##   Negative 780  97.62%
##      Total 799 100.00%
# AcHBe
hbv_markers %>% 
  tabyl(AcHBe_res) %>% 
  adorn_totals("row") %>% 
  adorn_pct_formatting(digits = 2)
##  AcHBe_res   n percent
##   Positive 766  95.87%
##   Negative  33   4.13%
##      Total 799 100.00%

Join ‘hbv_markers’ with ‘sociodemo_como’

SchistoHBV <- left_join(sociodemo_como, hbv_markers, by = "pid") %>% 
  select(pid, screen_d, age_cat, sex, country_orig, region_orig,
         orig_endemicity, reg_act, employ, alcohol_cons, audit, 
         etg_res, drugs, smoke_yn, HTA, diabete, BMI, dyslipidemia, 
         liver_dis_FDR, hcc_FDR, cvb_detection, AgHBs_res, 
         qAgHBs_cat, AgHBe_res, AcHBe_res)

rm(list = "sociodemo_como", "hbv_markers")

reactable(SchistoHBV)

HCV/HDV MARKERS

Create temporary dataset with all ‘HCV/HDV markers’ variables

hcv_hdv_markers <- bl_schisto_wide_25052024_noCoInf %>% 
  select(pid, AcVHC_res, AcVHD_res)

reactable(hcv_hdv_markers)

Analysis

# HCV
hcv_hdv_markers %>% 
  tabyl(AcVHC_res) %>% 
  adorn_totals("row") %>% 
  adorn_pct_formatting(digits = 2)
##  AcVHC_res   n percent
##   Positive   2   0.25%
##   Negative 797  99.75%
##      Total 799 100.00%
# HDV
hcv_hdv_markers %>% 
  tabyl(AcVHD_res) %>% 
  adorn_totals("row") %>% 
  adorn_pct_formatting(digits = 2)
##  AcVHD_res   n percent valid_percent
##   Positive  10   1.25%         1.25%
##   Negative 787  98.50%        98.75%
##       <NA>   2   0.25%             -
##      Total 799 100.00%       100.00%

TDF Treatment

Create temporary dataset with all ‘TDF’ variables

TDF_ttt <- bl_schisto_wide_25052024_noCoInf %>% 
  select(pid, enrol_d, on_TDF_bl, TDF_start) %>% 
  mutate(on_TDF_bl = case_when(is.na(on_TDF_bl) ~ "No",
                               on_TDF_bl == "FALSE" ~"No",
                               on_TDF_bl == "TRUE" ~ "Yes")) %>% 
  mutate(TDF_duration = case_when(on_TDF_bl == "No" ~ 0,
                                  on_TDF_bl == "Yes" ~ as.numeric(TDF_start - enrol_d))) %>% 
  select(-TDF_start) %>% 
  select(-enrol_d)

reactable(TDF_ttt)

Analysis

TDF_ttt %>% 
  tabyl(on_TDF_bl) %>% 
  adorn_totals("row") %>% 
  adorn_pct_formatting(digits = 2)
##  on_TDF_bl   n percent
##         No 749  93.74%
##        Yes  50   6.26%
##      Total 799 100.00%
# table of patients treated before baseline
patients_TDF_bl <- TDF_ttt %>% 
  filter(on_TDF_bl == "Yes")

# mean TDF duration at baseline
summary_TDF_duration <- patients_TDF_bl %>%
  summarise(
    mean_TDF_duration = mean(abs(as.numeric(TDF_duration)), na.rm = TRUE),
    CI95 = t.test(abs(as.numeric(TDF_duration)), conf.level = 0.95, na.rm = TRUE)$conf.int
  )
## Warning: Returning more (or less) than 1 row per `summarise()` group was deprecated in
## dplyr 1.1.0.
## ℹ Please use `reframe()` instead.
## ℹ When switching from `summarise()` to `reframe()`, remember that `reframe()`
##   always returns an ungrouped data frame and adjust accordingly.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
mean_TDF_duration <- summary_TDF_duration$mean_TDF_duration
CI95_lower <- summary_TDF_duration$CI95[1]
CI95_upper <- summary_TDF_duration$CI95[2]

mean_TDF_duration
## [1] 636.94 636.94
CI95_lower
## [1] 339.4184
CI95_upper
## [1] 934.4616
rm(list = "summary_TDF_duration", "patients_TDF_bl")

Join ‘TDF_ttt’ with ‘hcv_hdv_markers’ and then with the ‘SchistoHBV’ dataset

df <- left_join(hcv_hdv_markers, TDF_ttt, by = "pid")

SchistoHBV <- left_join(SchistoHBV, df, by = "pid") %>% 
  mutate(employ = str_to_title(employ),
         alcohol_cons = str_to_title(alcohol_cons),
         drugs = str_to_title(drugs),
         smoke_yn = str_to_title(smoke_yn),
         HTA = str_to_title(HTA),
         diabete = str_to_title(diabete),
         BMI = str_to_title(BMI),
         dyslipidemia = str_to_title(dyslipidemia),
         liver_dis_FDR = str_to_title(liver_dis_FDR),
         hcc_FDR = str_to_title(hcc_FDR))

reactable(SchistoHBV)
rm(list = "df", "TDF_ttt", "hcv_hdv_markers")

LIVER MARKERS

Create temporary dataset with all liver markers

liver_markers <- bl_schisto_wide_25052024_noCoInf %>% 
  select(pid, sex, alt_res, ast_res, pal_res, ggt_res, plt_res, blb_res, tp_res, alb_res, fibs_med_res, cap_med_res) %>%
  mutate(apri_score = ((ast_res/40)*100)/plt_res*1000) %>%
  mutate(apri_score = round(apri_score, 2)) %>% 
  # recategorize variables
  mutate(alt_res = case_when(sex == "Male" & alt_res > 50 ~ "High",
                             sex == "Female" & alt_res > 40 ~ "High",
                             TRUE ~ "Normal"),
         apri_diag = case_when(apri_score > 1.5 ~ "Severe Fibrosis",
                               apri_score < 0.5 ~ "No Fibrosis",
                               is.na(apri_score) ~ "NA",
                             TRUE ~ "Mild Fibrosis"),
         pal_res = case_when(pal_res > 300 ~ ">300 IU/L",     # liver disease
                             is.na(pal_res) ~ "NA",
                             TRUE ~ "Normal"),
         ggt_res = case_when(ggt_res > 50 ~ ">50 IU/L",
                             is.na(ggt_res) ~ "NA",
                             TRUE ~ "Normal"),
         blb_res = case_when(blb_res > 1.1 ~ ">1.1 mg/L",
                             is.na(blb_res) ~ "NA",
                             TRUE ~ "Normal"),
         tp_res = case_when(tp_res < 60 ~ "<60 g/L",       # liver or kidney disease
                            is.na(tp_res) ~ "NA",          
                            TRUE ~ "Normal"),
         alb_res = case_when(alb_res < 35 ~ "<35 g/L",     # liver disease, Crohn disease
                            is.na(alb_res) ~ "NA",
                             TRUE ~ "Normal"),
         fibs_med_diag = case_when(fibs_med_res >= 7.9 ~ "F2-4 (fibrosis or cirrhosis)",
                                   is.na(fibs_med_res) ~ "NA",
                                   TRUE ~ "F0-1 (no or mild fibrosis)"),
         cap_med_diag = case_when(cap_med_res >= 248 ~ "SLD",
                                  is.na(cap_med_res) ~ "NA",
                                  TRUE ~ "Normal")
         ) %>% 
  select(pid, alt_res, pal_res, ggt_res, blb_res, tp_res, alb_res, apri_score, fibs_med_res, apri_diag, fibs_med_diag, cap_med_res, cap_med_diag)

# SOURCES : Lala V, Zubair M, Minter DA. Liver Function Tests. [Updated 2023 Jul 30]. In: StatPearls [Internet]. Treasure Island (FL): StatPearls Publishing; 2024 Jan-. Available from: https://www.ncbi.nlm.nih.gov/books/NBK482489
## APRI & FIB-4 : Lee J, Vali Y, Boursier J, Spijker R, Anstee QM, Bossuyt PM, Zafarmand MH. Prognostic accuracy of FIB-4, NAFLD fibrosis score and APRI for NAFLD-related events: A systematic review. Liver Int. 2021 Feb;41(2):261-270. doi: 10.1111/liv.14669. PMID: 32946642; PMCID: PMC7898346.
## Fibrosis : Johannessen, A., Stockdale, A.J., Henrion, M.Y.R. et al. Systematic review and individual-patient-data meta-analysis of non-invasive fibrosis markers for chronic hepatitis B in Africa. Nat Commun 14, 45 (2023). https://doi.org/10.1038/s41467-022-35729-w
## Steatosis : Karlas, T., et al., Individual patient data meta-analysis of controlled attenuation parameter (CAP) technology for assessing steatosis. J Hepatol, 2017. 66(5): p. 1022-1030.

reactable(liver_markers)

Analysis

# ALAT
liver_markers %>%
  tabyl(alt_res) %>% 
  adorn_totals("row") %>% 
  adorn_pct_formatting(digits = 2)
##  alt_res   n percent
##     High  37   4.63%
##   Normal 762  95.37%
##    Total 799 100.00%
# PAL
liver_markers %>%
  tabyl(pal_res) %>% 
  adorn_totals("row") %>% 
  adorn_pct_formatting(digits = 2)
##    pal_res   n percent
##  >300 IU/L  17   2.13%
##         NA   4   0.50%
##     Normal 778  97.37%
##      Total 799 100.00%
# GGT
liver_markers %>%
  tabyl(ggt_res) %>% 
  adorn_totals("row") %>% 
  adorn_pct_formatting(digits = 2)
##   ggt_res   n percent
##  >50 IU/L  22   2.75%
##        NA   4   0.50%
##    Normal 773  96.75%
##     Total 799 100.00%
# Total Bilirubin
liver_markers %>%
  tabyl(blb_res) %>% 
  adorn_totals("row") %>% 
  adorn_pct_formatting(digits = 2)
##    blb_res   n percent
##  >1.1 mg/L  68   8.51%
##         NA  90  11.26%
##     Normal 641  80.23%
##      Total 799 100.00%
# Total Protein
liver_markers %>%
  tabyl(tp_res) %>% 
  adorn_totals("row") %>% 
  adorn_pct_formatting(digits = 2)
##   tp_res   n percent
##  <60 g/L   1   0.13%
##       NA 713  89.24%
##   Normal  85  10.64%
##    Total 799 100.00%
# Albumin
liver_markers %>%
  tabyl(alb_res) %>% 
  adorn_totals("row") %>% 
  adorn_pct_formatting(digits = 2)
##  alb_res   n percent
##  <35 g/L  69   8.64%
##       NA  93  11.64%
##   Normal 637  79.72%
##    Total 799 100.00%
# APRI
liver_markers %>%
  tabyl(apri_diag) %>% 
  adorn_totals("row") %>% 
  adorn_pct_formatting(digits = 2)
##        apri_diag   n percent
##    Mild Fibrosis  30   3.75%
##      No Fibrosis 761  95.24%
##  Severe Fibrosis   8   1.00%
##            Total 799 100.00%
# FibroScan LSM
liver_markers %>%
  tabyl(fibs_med_diag) %>% 
  adorn_totals("row") %>% 
  adorn_pct_formatting(digits = 2)
##                 fibs_med_diag   n percent
##    F0-1 (no or mild fibrosis) 731  91.49%
##  F2-4 (fibrosis or cirrhosis)  68   8.51%
##                         Total 799 100.00%
# FibroScan CAP
liver_markers %>%
  tabyl(cap_med_diag) %>% 
  adorn_totals("row") %>% 
  adorn_pct_formatting(digits = 2)
##  cap_med_diag   n percent
##        Normal 731  91.49%
##           SLD  68   8.51%
##         Total 799 100.00%
# Remove 'Bilirubin' (n=90), 'Total Protein' (n=713) and 'Albumin' (n=93) because of too many NAs
liver_markers <- liver_markers %>% 
  select(-blb_res, -tp_res, -alb_res)

I want to identify how many patients have discordant fibrosis diagnoses between ‘apri_diag’ and ‘fibs_med_diag’

discordant_patients <- liver_markers %>%
  filter((apri_diag == "No or Mild Fibrosis" & fibs_med_diag == "F2-4 (fibrosis or cirrhosis)") |
         (apri_diag == "Severe Fibrosis" & fibs_med_diag == "F0-1 (no or mild fibrosis)") |
         (apri_diag != "Mild Fibrosis" & apri_diag != "Severe Fibrosis" & apri_diag != "No Fibrosis") |
         (fibs_med_diag != "F0-1 (no or mild fibrosis)" & fibs_med_diag != "F2-4 (fibrosis or cirrhosis)"))

# Print discordant patients (n=2)
reactable(discordant_patients)

Let’s investigate why these two patients have discordant results between the APRI score and the FibroScan

bl_schisto_wide_25052024_noCoInf %>% 
  filter(pid %in% c("SENB1342", "SENB1628")) #...
## # A tibble: 2 × 224
##   pid      hiv_status enrol_d    center med   sen_yn country_orig region_orig
##   <chr>    <chr>      <date>     <fct>  <chr> <fct>  <fct>        <fct>      
## 1 SENB1342 HBV        2021-09-01 SMIT   6     yes    <NA>         Thiès      
## 2 SENB1628 HBV        2022-05-09 SMIT   6     yes    <NA>         Dakar      
## # ℹ 216 more variables: dpt_orig <fct>, res_time <dbl>, reg_act <fct>,
## #   dpt_act <fct>, birth_d <date>, age_en <dbl>, sex <fct>, rank <dbl>,
## #   marital <fct>, union <fct>, child <dbl>, education <fct>, employ <fct>,
## #   other_employ <chr>, health_ins <fct>, hiv_yn <fct>, type_HIV <chr>,
## #   hiv_test1 <fct>, hiv_test1_date <date>, hiv_test2 <fct>,
## #   hiv_test2_date <date>, CD4_last <dbl>, CD4_last_date <date>,
## #   hiv_arn_last <dbl>, hiv_arn_less <chr>, hiv_arn_date <date>, who <dbl>, …

Join ‘liver_markers’ with ‘SchistoHBV’ dataset

SchistoHBV <- left_join(SchistoHBV, liver_markers, by = "pid")
reactable(SchistoHBV)

URINE MARKERS

Create temporary dataset with all ‘urine markers’ variables needed

urine_markers <- bl_schisto_wide_25052024_noCoInf %>% 
  select(pid, protu_res, creu_res, albu_res, leubu_res, hembu_res, protbu_res, glubu_res)

# create 'protein/creatinine ratio' variable (https://doi.org/10.1080/10408363.2020.1723487)
urine_markers <- urine_markers %>% 
  mutate(upcr_ratio = protu_res / creu_res) %>%
  mutate(upcr_ratio = round(upcr_ratio, 2)) %>% 
  mutate(upcr_diag = case_when(upcr_ratio > 0.2 ~ "Proteinuria (>0.2 mg/mg)",
                               TRUE ~ "Normal (<0.2 mg/mg)")) %>%
  select(pid, protu_res, creu_res, upcr_ratio, upcr_diag, albu_res, leubu_res, hembu_res, protbu_res, glubu_res)

# create 'albumin/creatinine ratio' variable (https://doi.org/10.1097/JU.0000000000001005)
urine_markers <- urine_markers %>% 
  mutate(uacr_ratio = albu_res / creu_res) %>%
  mutate(uacr_ratio = round(uacr_ratio, 2)) %>% 
  mutate(uacr_diag = case_when(uacr_ratio > 0.3 ~ "Severe Albuminuria (>0.3 mg/mg)",
                               uacr_ratio <= 0.03 ~ "Normal (<0.03 mg/mg)",
                               TRUE ~ "Moderate Albuminuria (0.03-0.3 mg/mg)")) %>%
  select(pid, protu_res, creu_res, upcr_ratio, upcr_diag, albu_res, uacr_ratio, uacr_diag, 
         leubu_res, hembu_res, protbu_res, glubu_res)

reactable(urine_markers)

ULTRASOUND (US) DATA

Create temporary dataset with all ‘Ultrasound’ variables needed

bl_schisto_wide_25052024_noCoInf
## # A tibble: 799 × 224
##    pid      hiv_status enrol_d    center med   sen_yn country_orig region_orig
##    <chr>    <chr>      <date>     <fct>  <chr> <fct>  <fct>        <fct>      
##  1 SENB1002 HBV        2019-09-30 SMIT   1     yes    <NA>         Fatick     
##  2 SENB1003 HBV        2019-10-01 SMIT   1     yes    <NA>         Dakar      
##  3 SENB1004 HBV        2019-10-01 SMIT   1     yes    <NA>         Dakar      
##  4 SENB1005 HBV        2019-10-01 SMIT   1     yes    <NA>         Dakar      
##  5 SENB1006 HBV        2019-10-01 SMIT   1     yes    <NA>         Dakar      
##  6 SENB1007 HBV        2019-10-02 SMIT   1     yes    <NA>         Ziguinchor 
##  7 SENB1008 HBV        2019-10-02 SMIT   1     yes    <NA>         Kaolack    
##  8 SENB1009 HBV        2019-10-02 SMIT   1     yes    <NA>         Louga      
##  9 SENB1010 HBV        2019-10-02 SMIT   1     yes    <NA>         Diurbel    
## 10 SENB1011 HBV        2019-10-02 SMIT   1     no     Mali         <NA>       
## # ℹ 789 more rows
## # ℹ 216 more variables: dpt_orig <fct>, res_time <dbl>, reg_act <fct>,
## #   dpt_act <fct>, birth_d <date>, age_en <dbl>, sex <fct>, rank <dbl>,
## #   marital <fct>, union <fct>, child <dbl>, education <fct>, employ <fct>,
## #   other_employ <chr>, health_ins <fct>, hiv_yn <fct>, type_HIV <chr>,
## #   hiv_test1 <fct>, hiv_test1_date <date>, hiv_test2 <fct>,
## #   hiv_test2_date <date>, CD4_last <dbl>, CD4_last_date <date>, …